library(tidyverse)
library(here)
library(phyloseq)
library(vegan)
theme_set(theme_bw())
filt_fun <- function (x, min_reads = 100, min_samples = 5) {
(sum(x) > min_reads) & (sum(x > 0) > min_samples)
}
ps <- readRDS(here('data','following_study','ps_rarefied.rds')) %>%
filter_taxa(filt_fun, prune = TRUE)
sample_data(ps)[is.na(sample_data(ps))] <- 'NA'
sam <- data.frame(sample_data(ps))
max.core <- parallel::detectCores()
alpha.diversity <- data.frame(sample_data(ps),estimate_richness(ps),read_depth = rowSums(otu_table(ps)))
ggplot(alpha.diversity,aes(x = Household, y = Shannon, group = Household)) +
geom_point() + geom_line()
anova(lm(Shannon~Household, data = alpha.diversity))
## Analysis of Variance Table
##
## Response: Shannon
## Df Sum Sq Mean Sq F value Pr(>F)
## Household 48 8.4359 0.17575 1.566 0.06064 .
## Residuals 49 5.4992 0.11223
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(alpha.diversity,aes(x = Epileptic.or.Control, y = Shannon)) +
geom_boxplot() + geom_jitter(height = 0, width = 0.25)
Paired t test
epileptic <- alpha.diversity %>% filter(Epileptic.or.Control == 'Epileptic')
control <- alpha.diversity %>% filter(Epileptic.or.Control == 'Control')
t.test(epileptic$Shannon, control$Shannon, paired = TRUE)
##
## Paired t-test
##
## data: epileptic$Shannon and control$Shannon
## t = -0.22662, df = 48, p-value = 0.8217
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.1529073 0.1219302
## sample estimates:
## mean difference
## -0.01548853
ANOVA with Household effect added
anova(lm(Shannon~Household + Epileptic.or.Control, data = alpha.diversity))
## Analysis of Variance Table
##
## Response: Shannon
## Df Sum Sq Mean Sq F value Pr(>F)
## Household 48 8.4359 0.175747 1.5357 0.07042 .
## Epileptic.or.Control 1 0.0059 0.005877 0.0514 0.82168
## Residuals 48 5.4933 0.114444
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(alpha.diversity) +
geom_point(aes(x = Breed.Group..1., y = Shannon, colour = Breed.Group..1.)) +
facet_wrap(~Epileptic.or.Control) +
theme(axis.text.x = element_blank(), axis.ticks.x.bottom = element_blank())
# here breed group with na is removed
anova(lm(Shannon~Household + Breed.Group..1., data = alpha.diversity %>% filter(Breed.Group..1. != 'NA')))
## Analysis of Variance Table
##
## Response: Shannon
## Df Sum Sq Mean Sq F value Pr(>F)
## Household 45 7.2456 0.16101 1.6626 0.05393 .
## Breed.Group..1. 4 1.3804 0.34511 3.5637 0.01431 *
## Residuals 39 3.7768 0.09684
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
alpha.diversity.epi <- alpha.diversity %>% filter(Epileptic.or.Control == 'Epileptic')
ggplot(alpha.diversity.epi, aes(x = Pheno.Y.N, y = Shannon)) +
geom_boxplot() + geom_jitter(height = 0, width = 0.25)
t.test(Shannon~Pheno.Y.N, data = alpha.diversity.epi)
##
## Welch Two Sample t-test
##
## data: Shannon by Pheno.Y.N
## t = 0.84674, df = 33.509, p-value = 0.4031
## alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
## 95 percent confidence interval:
## -0.1450834 0.3521435
## sample estimates:
## mean in group No mean in group Yes
## 2.730402 2.626872
ggplot(alpha.diversity, aes(x = Sex, y = Shannon)) +
geom_boxplot() + geom_jitter(height = 0, width = 0.25)
anova(lm(Shannon~Household + Sex, data = alpha.diversity))
## Analysis of Variance Table
##
## Response: Shannon
## Df Sum Sq Mean Sq F value Pr(>F)
## Household 48 8.4359 0.17575 1.6253 0.04789 *
## Sex 1 0.3088 0.30882 2.8560 0.09752 .
## Residuals 48 5.1903 0.10813
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(Shannon~Sex, data = alpha.diversity)
##
## Welch Two Sample t-test
##
## data: Shannon by Sex
## t = -1.315, df = 87.559, p-value = 0.1919
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
## -0.25315537 0.05154705
## sample estimates:
## mean in group F mean in group M
## 2.658970 2.759775
ggplot(alpha.diversity, aes(x = as.numeric(Age..months.), y = Shannon)) +
geom_point()
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
lm(Shannon~as.numeric(Age..months.), data = alpha.diversity) %>% summary()
## Warning in eval(predvars, data, env): NAs introduced by coercion
##
## Call:
## lm(formula = Shannon ~ as.numeric(Age..months.), data = alpha.diversity)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.92260 -0.28131 -0.01761 0.27021 0.78472
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6317958 0.0896466 29.357 <2e-16 ***
## as.numeric(Age..months.) 0.0007684 0.0010859 0.708 0.481
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3742 on 94 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.005298, Adjusted R-squared: -0.005284
## F-statistic: 0.5007 on 1 and 94 DF, p-value: 0.4809
ord <- ps %>% ordinate(method = "MDS", distance = 'unifrac')
plot_ordination(ps, ord)
The PCoA plot shows a very separate pattern of four clusters. Let’s take a look at the phlogenetic tree of our data.
plot_tree(ps, "treeonly")
Some of the taxa have extremely long branch. Are they influential?
# Warning: Not all nodes are descendants in this tree
if (require(adephylo)) {dist.root <- adephylo::distRoot(phy_tree(ps))}
## Loading required package: adephylo
## Loading required package: ade4
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
hist(dist.root); dist.root %>% sort(decreasing = TRUE) %>% head(20)
## ASV601 ASV438 ASV563 ASV287 ASV484 ASV133 ASV562 ASV437
## 20.819315 5.082379 4.338564 4.267155 4.245708 4.159508 4.154919 4.121791
## ASV450 ASV286 ASV478 ASV357 ASV446 ASV454 ASV423 ASV380
## 4.088335 4.077083 4.074518 4.060563 4.045067 4.041199 4.026320 1.631610
## ASV213 ASV644 ASV268 ASV41
## 1.617836 1.569113 1.417223 1.411706
detach('package:adephylo', character.only = TRUE, unload = TRUE, force = TRUE)
Let’s see which ASV has distance from root longer than 10.
long.branch <- dist.root[dist.root > 5]
long.branch
## ASV601 ASV438
## 20.819315 5.082379
Are these ASVs special?
dada2:::pfasta(refseq(ps)[names(long.branch)], ids = names(long.branch))
## >ASV601
## GGAGAGCCATGTCTTATAAGTATACCATACTCTTTTAACTTCTTAATGTTCTTTTCTATGCTTCGTATTCGAAAGAAATAAATTCACATTTTTTTCTGTGTATCAGTTAATTCATAATCCTGAAATCTCGGCTTCGATAGTCCATACTCTCTCTTATCCAGAAACACTGCACGATCTGTTCCCTTGAATACCGCACATTGTGTTTTAGCGAATGAAAAATAGTCCGATGTCAGCAACGCATAGGCATTCGTTGCCAGAAGCTGTCCCTCACTCTGCTTTAAAATCTTCCACCAACATC
## >ASV438
## CAAATCCGAACTGCAAAATACTCATGGAAAGAATCGCTCCTGTTTTTATTCCTTCTCCTAACATGGTAAGAACAGAAACTGGCACTCCAAATACAGAAAAACCACAAAGCATACAAATAAAAAGCCATCCGCCTCTCTGTGCCAGAACCTGGAAAAAATATTCTTTTCCAGAAACATCTTCCCCCGCAAAAGCACCCAAAAGGTAAAATACATGGACTGTTTCCTGTTTCCACTGCATCTTCCACAAAAGATTTGGAAGAACCGTTCCC
Result returned from BLAST (web interface):
| ASV | Taxonomy | % identity |
|---|---|---|
| ASV601 | Anaerobutyricum hallii | 95.1 |
| ASV438 | uncultured Blautia sp. | 97.3 |
Now we remove this outlier ASVs
all.taxa <- taxa_names(ps)
keep.taxa <- all.taxa[!(all.taxa %in% names(long.branch))]
ps.sub <- ps %>% prune_taxa(keep.taxa, .)
Convert ps.sub to proportion
ps.sub <- ps.sub %>% transform_sample_counts(function(x) x / sum(x))
sample_sums(ps.sub) %>% summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
plot_ord <- function(data, factor, method, distance){
data.ord <- ordinate(data, method = method, distance = distance)
p <- plot_ordination(data, data.ord, color = factor)
p <- p + stat_ellipse(type = "t",geom = "polygon",alpha = 0)
p <- p + ggtitle(str_c(factor,method,distance, sep = ' - '))
print(p)
}
permanova <- function(data, formula, method, permutations=1e4, strata = NULL, core = max.core){
dist.matrix <- phyloseq::distance(data, method=method)
df <- data.frame(sample_data(data))
model <- as.formula(paste0('dist.matrix~', formula))
if (!is_null(strata)) {strata <- df[,strata]}
result <- adonis2(model,
data = df,
permutations=permutations,
strata = strata,
parallel = core)
return(result)
}
permdisp <- function(data, group, method, permutations=1e4, pairwise = FALSE, core = max.core){
dist.matrix <- phyloseq::distance(data, method=method)
df <- data.frame(sample_data(data))
beta.disp <- betadisper(dist.matrix, group = df[,group])
result <- permutest(beta.disp, permutations = permutations, pairwise = pairwise, type = 'centroid')
return(result)
}
permanova(ps.sub, 'Household', 'bray')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Household 48 11.7188 0.68925 2.2643 9.999e-05 ***
## Residual 49 5.2834 0.31075
## Total 97 17.0021 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_ord(ps.sub, 'Epileptic.or.Control','MDS','bray')
plot_ord(ps.sub, 'Epileptic.or.Control','MDS','wunifrac')
plot_ord(ps.sub, 'Epileptic.or.Control','NMDS','bray')
## Run 0 stress 0.2084864
## Run 1 stress 0.2169365
## Run 2 stress 0.2143554
## Run 3 stress 0.2222207
## Run 4 stress 0.2029839
## ... New best solution
## ... Procrustes: rmse 0.0417085 max resid 0.2727279
## Run 5 stress 0.2030693
## ... Procrustes: rmse 0.003774634 max resid 0.02557333
## Run 6 stress 0.2042559
## Run 7 stress 0.2041852
## Run 8 stress 0.2045934
## Run 9 stress 0.2088616
## Run 10 stress 0.2227023
## Run 11 stress 0.2333378
## Run 12 stress 0.2093196
## Run 13 stress 0.2126081
## Run 14 stress 0.2031838
## ... Procrustes: rmse 0.00802472 max resid 0.04304131
## Run 15 stress 0.2037942
## Run 16 stress 0.20302
## ... Procrustes: rmse 0.003164349 max resid 0.02507692
## Run 17 stress 0.2091759
## Run 18 stress 0.2096705
## Run 19 stress 0.2092369
## Run 20 stress 0.2039328
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 4: no. of iterations >= maxit
## 16: stress ratio > sratmax
plot_ord(ps.sub, 'Epileptic.or.Control','NMDS','wunifrac')
## Run 0 stress 0.1726183
## Run 1 stress 0.1848912
## Run 2 stress 0.1861148
## Run 3 stress 0.1813724
## Run 4 stress 0.1880015
## Run 5 stress 0.1888728
## Run 6 stress 0.1824362
## Run 7 stress 0.1876135
## Run 8 stress 0.1811159
## Run 9 stress 0.1706236
## ... New best solution
## ... Procrustes: rmse 0.02601623 max resid 0.1730982
## Run 10 stress 0.1779501
## Run 11 stress 0.1891485
## Run 12 stress 0.1697156
## ... New best solution
## ... Procrustes: rmse 0.02442171 max resid 0.1715692
## Run 13 stress 0.1785785
## Run 14 stress 0.1838331
## Run 15 stress 0.1774541
## Run 16 stress 0.193334
## Run 17 stress 0.1832153
## Run 18 stress 0.179066
## Run 19 stress 0.1718317
## Run 20 stress 0.1855432
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
permanova(ps.sub, 'Epileptic.or.Control', 'bray', strata = 'Household')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: strata
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Epileptic.or.Control 1 0.1499 0.00882 0.854 0.1135
## Residual 96 16.8522 0.99118
## Total 97 17.0021 1.00000
permanova(ps.sub, 'Epileptic.or.Control', 'wunifrac', strata = 'Household')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: strata
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Epileptic.or.Control 1 0.1149 0.01615 1.5756 0.0215 *
## Residual 96 6.9986 0.98385
## Total 97 7.1135 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.sub, 'Epileptic.or.Control', 'bray')
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.00251 0.0025142 0.1605 10000 0.6884
## Residuals 96 1.50379 0.0156645
permdisp(ps.sub, 'Epileptic.or.Control', 'wunifrac')
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.02183 0.0218264 2.2935 10000 0.1431
## Residuals 96 0.91358 0.0095165
a table of # of breed. frequency table
(data.frame(sample_data(ps.sub))$Breed.Group..1.) %>% table()
## .
## Herder NA Pointer Spaniel Retriever Scent Hound
## 20 9 26 26 5
## Sight Hound Sled Dog Terrier
## 3 6 3
plot_ord(ps.sub, 'Breed.Group..1.','MDS','bray')
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
plot_ord(ps.sub, 'Breed.Group..1.','MDS','wunifrac')
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
plot_ord(ps.sub, 'Breed.Group..1.','NMDS','bray')
## Run 0 stress 0.2084864
## Run 1 stress 0.2105635
## Run 2 stress 0.2099617
## Run 3 stress 0.2107293
## Run 4 stress 0.2053455
## ... New best solution
## ... Procrustes: rmse 0.03850885 max resid 0.2582537
## Run 5 stress 0.2228343
## Run 6 stress 0.2059934
## Run 7 stress 0.2030101
## ... New best solution
## ... Procrustes: rmse 0.04140392 max resid 0.2782154
## Run 8 stress 0.2095771
## Run 9 stress 0.2060342
## Run 10 stress 0.231039
## Run 11 stress 0.2326269
## Run 12 stress 0.2160672
## Run 13 stress 0.2267309
## Run 14 stress 0.2122406
## Run 15 stress 0.2051884
## Run 16 stress 0.2367514
## Run 17 stress 0.2040126
## Run 18 stress 0.2089416
## Run 19 stress 0.2039733
## Run 20 stress 0.2029079
## ... New best solution
## ... Procrustes: rmse 0.00394192 max resid 0.02547275
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 6: no. of iterations >= maxit
## 14: stress ratio > sratmax
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
plot_ord(ps.sub, 'Breed.Group..1.','NMDS','wunifrac')
## Run 0 stress 0.1726183
## Run 1 stress 0.183285
## Run 2 stress 0.1860284
## Run 3 stress 0.1829084
## Run 4 stress 0.1833238
## Run 5 stress 0.1827231
## Run 6 stress 0.1891264
## Run 7 stress 0.1815501
## Run 8 stress 0.1824815
## Run 9 stress 0.1763169
## Run 10 stress 0.1814126
## Run 11 stress 0.1814527
## Run 12 stress 0.1759098
## Run 13 stress 0.1964532
## Run 14 stress 0.1789556
## Run 15 stress 0.1885385
## Run 16 stress 0.183834
## Run 17 stress 0.1706237
## ... New best solution
## ... Procrustes: rmse 0.02601988 max resid 0.1731015
## Run 18 stress 0.1859179
## Run 19 stress 0.1859753
## Run 20 stress 0.1775934
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
# same breed group v.s. diff breed group in household
# check if data are paired within household
if (!identical(sample_data(ps)$Household[seq(1,98,2)],
sample_data(ps)$Household[seq(1,98,2)+1])) stop('data is not paired')
same.breed.group <- table(sample_data(ps)$Household, sample_data(ps)$Breed.Group..1.) %>%
apply(1, function(x) any(x == 2)) # if dog's grouping are same with in each house
same.breed.group;sum(same.breed.group)
## 1 10 11 12 13 14 15 16 17 18 19 2 20
## FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## 21 22 23 24 25 26 27 28 29 3 30 31 32
## TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
## 33 34 35 36 38 39 4 40 41 42 43 44 45
## TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## 47 48 49 5 50 51 6 7 8 9
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [1] 39
Here we see 39 out of 49 household have dogs in same breed group
permanova(ps.sub, 'Household + Breed.Group..1.', 'bray')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Household 48 11.7188 0.68925 2.3509 9.999e-05 ***
## Breed.Group..1. 6 0.8179 0.04811 1.3127 0.07059 .
## Residual 43 4.4655 0.26264
## Total 97 17.0021 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permanova(ps.sub, 'Household + Breed.Group..1.', 'wunifrac')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Household 48 4.8365 0.67991 2.2564 9.999e-05 ***
## Breed.Group..1. 6 0.3568 0.05015 1.3316 0.1238
## Residual 43 1.9202 0.26993
## Total 97 7.1135 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Here we
ps.control <- ps.sub %>% subset_samples(Epileptic.or.Control == 'Control')
ps.epileptic <- ps.sub %>% subset_samples(Epileptic.or.Control == 'Epileptic')
perm.breed.bray.ctl <- permanova(ps.control, 'Breed.Group..1.', 'bray')
perm.breed.wunif.ctl <- permanova(ps.control, 'Breed.Group..1.', 'wunifrac')
perm.breed.bray.ctl;perm.breed.wunif.ctl
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Breed.Group..1. 7 1.6962 0.19479 1.4169 0.0313 *
## Residual 41 7.0114 0.80521
## Total 48 8.7076 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Breed.Group..1. 7 0.8423 0.21377 1.5925 0.0317 *
## Residual 41 3.0977 0.78623
## Total 48 3.9400 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
perm.breed.bray.epi <- permanova(ps.epileptic, 'Breed.Group..1.', 'bray')
perm.breed.wunif.epi <- permanova(ps.epileptic, 'Breed.Group..1.', 'wunifrac')
perm.breed.bray.epi;perm.breed.wunif.epi
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Breed.Group..1. 7 1.2777 0.15688 1.0898 0.2592
## Residual 41 6.8669 0.84312
## Total 48 8.1446 1.00000
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Breed.Group..1. 7 0.55214 0.18052 1.2902 0.1047
## Residual 41 2.50647 0.81948
## Total 48 3.05861 1.00000
\[X^2=-2 \sum_{i=1}^k \ln \left(p_i\right)\]
bray.p <- c(perm.breed.bray.ctl$`Pr(>F)`[1], perm.breed.bray.epi$`Pr(>F)`[1])
# Calculate the combined p-value with fisher's method
pchisq( -2 * sum(log(bray.p)), df = 4, lower.tail = FALSE)
## [1] 0.04716331
wunif.p <- c(perm.breed.wunif.ctl$`Pr(>F)`[1], perm.breed.wunif.epi$`Pr(>F)`[1])
# Calculate the combined p-value with fisher's method
pchisq( -2 * sum(log(wunif.p)), df = 4, lower.tail = FALSE)
## [1] 0.02226031
With using the weighted-unifrac distance, there’s at
least one of the null hypothesis is rejected.
Here we are only focusing on epileptic dogs.
plot_ord(ps.epileptic, 'Pheno.Y.N','MDS','bray')
plot_ord(ps.epileptic, 'Pheno.Y.N','MDS','wunifrac')
plot_ord(ps.epileptic, 'Pheno.Y.N','NMDS','bray')
## Run 0 stress 0.2030888
## Run 1 stress 0.2165641
## Run 2 stress 0.2077776
## Run 3 stress 0.4014587
## Run 4 stress 0.2047078
## Run 5 stress 0.200431
## ... New best solution
## ... Procrustes: rmse 0.07235392 max resid 0.321837
## Run 6 stress 0.2093761
## Run 7 stress 0.2038619
## Run 8 stress 0.2102078
## Run 9 stress 0.2080904
## Run 10 stress 0.2004312
## ... Procrustes: rmse 0.001237062 max resid 0.006705547
## ... Similar to previous best
## Run 11 stress 0.2108405
## Run 12 stress 0.2060071
## Run 13 stress 0.2132366
## Run 14 stress 0.2017719
## Run 15 stress 0.2044891
## Run 16 stress 0.2163085
## Run 17 stress 0.2135044
## Run 18 stress 0.2054275
## Run 19 stress 0.2189759
## Run 20 stress 0.2114716
## *** Best solution repeated 1 times
plot_ord(ps.epileptic, 'Pheno.Y.N','NMDS','wunifrac')
## Run 0 stress 0.1857
## Run 1 stress 0.1963714
## Run 2 stress 0.1831746
## ... New best solution
## ... Procrustes: rmse 0.07582157 max resid 0.2412167
## Run 3 stress 0.2013383
## Run 4 stress 0.1914569
## Run 5 stress 0.1960532
## Run 6 stress 0.1943161
## Run 7 stress 0.1904314
## Run 8 stress 0.199011
## Run 9 stress 0.1942077
## Run 10 stress 0.1944728
## Run 11 stress 0.1853752
## Run 12 stress 0.1991867
## Run 13 stress 0.1943618
## Run 14 stress 0.198715
## Run 15 stress 0.1932262
## Run 16 stress 0.1874326
## Run 17 stress 0.192997
## Run 18 stress 0.2014187
## Run 19 stress 0.1814438
## ... New best solution
## ... Procrustes: rmse 0.03465726 max resid 0.2303
## Run 20 stress 0.1935535
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 1: no. of iterations >= maxit
## 19: stress ratio > sratmax
permanova(ps.epileptic, 'Pheno.Y.N','bray')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Pheno.Y.N 1 0.2090 0.02566 1.2379 0.1923
## Residual 47 7.9356 0.97434
## Total 48 8.1446 1.00000
permanova(ps.epileptic, 'Pheno.Y.N','wunifrac')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Pheno.Y.N 1 0.06486 0.02121 1.0182 0.4133
## Residual 47 2.99375 0.97879
## Total 48 3.05861 1.00000
permdisp(ps.epileptic, 'Pheno.Y.N','bray')
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.01951 0.019507 1.437 10000 0.2444
## Residuals 47 0.63801 0.013575
permdisp(ps.epileptic, 'Pheno.Y.N','wunifrac')
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.03162 0.0316221 4.2019 10000 0.0447 *
## Residuals 47 0.35370 0.0075256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# within epileptic dogs
table(sample_data(ps)$Seizure.Freedom..Y.N., sample_data(ps)$Epileptic.or.Control)
##
## Control Epileptic
## NA 49 3
## No 0 32
## Yes 0 14
table(sample_data(ps)$Seizure.Control..Satisfactory.Unsatisfactory., sample_data(ps)$Epileptic.or.Control)
##
## Control Epileptic
## NA 49 3
## S 0 33
## US 0 13
Does health condition related to dog’s sex? Here we conduct a Chi-squared test.
# chisq test on epi male dog v.s. female.
# not within health condition
tb <- table(sample_data(ps)$Sex, sample_data(ps)$Epileptic.or.Control)
tb
##
## Control Epileptic
## F 33 25
## M 16 24
chisq.test(tb)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tb
## X-squared = 2.0698, df = 1, p-value = 0.1502
plot_ord(ps, 'Sex','MDS','bray')
plot_ord(ps, 'Sex','MDS','wunifrac')
plot_ord(ps, 'Sex','NMDS','bray')
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2261393
## Run 1 stress 0.2550509
## Run 2 stress 0.2288129
## Run 3 stress 0.2393938
## Run 4 stress 0.2330993
## Run 5 stress 0.2491796
## Run 6 stress 0.2252235
## ... New best solution
## ... Procrustes: rmse 0.02912235 max resid 0.1599846
## Run 7 stress 0.2354197
## Run 8 stress 0.2378738
## Run 9 stress 0.2396912
## Run 10 stress 0.258135
## Run 11 stress 0.2371867
## Run 12 stress 0.2427663
## Run 13 stress 0.2370327
## Run 14 stress 0.2349535
## Run 15 stress 0.2388403
## Run 16 stress 0.2499185
## Run 17 stress 0.2252235
## ... Procrustes: rmse 2.324353e-05 max resid 0.0001447854
## ... Similar to previous best
## Run 18 stress 0.2412595
## Run 19 stress 0.2379972
## Run 20 stress 0.2421599
## *** Best solution repeated 1 times
plot_ord(ps, 'Sex','NMDS','wunifrac')
## Run 0 stress 0.1724074
## Run 1 stress 0.1795168
## Run 2 stress 0.1839198
## Run 3 stress 0.180044
## Run 4 stress 0.1938782
## Run 5 stress 0.1807482
## Run 6 stress 0.1822171
## Run 7 stress 0.1796925
## Run 8 stress 0.1720206
## ... New best solution
## ... Procrustes: rmse 0.0105652 max resid 0.09906823
## Run 9 stress 0.1711419
## ... New best solution
## ... Procrustes: rmse 0.02878655 max resid 0.1695473
## Run 10 stress 0.1791825
## Run 11 stress 0.1695882
## ... New best solution
## ... Procrustes: rmse 0.02433374 max resid 0.1728809
## Run 12 stress 0.1722713
## Run 13 stress 0.1705113
## Run 14 stress 0.1754532
## Run 15 stress 0.1732358
## Run 16 stress 0.1860074
## Run 17 stress 0.1857264
## Run 18 stress 0.1720204
## Run 19 stress 0.1792622
## Run 20 stress 0.1901897
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
permanova(ps.sub, 'Household + Sex', 'bray')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Household 48 11.7188 0.68925 2.2419 9.999e-05 ***
## Sex 1 0.0561 0.00330 0.5156 0.9657
## Residual 48 5.2272 0.30745
## Total 97 17.0021 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permanova(ps.sub, 'Household + Sex', 'wunifrac')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Household 48 4.8365 0.67991 2.1305 9.999e-05 ***
## Sex 1 0.0068 0.00096 0.1443 0.9987
## Residual 48 2.2701 0.31913
## Total 97 7.1135 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.sub, 'Sex', 'bray')
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.01527 0.015267 0.9824 10000 0.3302
## Residuals 96 1.49198 0.015541
permdisp(ps.sub, 'Sex', 'wunifrac')
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.00068 0.000684 0.0672 10000 0.7972
## Residuals 96 0.97753 0.010183
sam$Age..months. <- as.numeric(sam$Age..months.)
ggplot(sam) +
geom_line(aes(x = as.numeric(Household), y = Age..months./12, group = Household)) +
geom_point(aes(x = as.numeric(Household), y = Age..months./12, colour = Epileptic.or.Control)) +
xlab('Household') + ylab('Age in Year')
sam.epi <- sam %>% filter(Epileptic.or.Control == 'Epileptic')
sam.ctl <- sam %>% filter(Epileptic.or.Control == 'Control')
age.diff <- data.frame(Household = sam.epi$Household, Age.Diff = sam.epi$Age..months. - sam.ctl$Age..months.)
ggplot(age.diff) +
geom_bar(aes(x = as.numeric(Household), y = Age.Diff/12), stat = 'identity') +
xlab('Household') + ylab('Age Difference in Year')
permanova(ps.sub, 'Household + Age..months.', 'bray')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Household 48 11.7188 0.68925 2.3061 9.999e-05 ***
## Age..months. 19 2.1074 0.12395 1.0477 0.355
## Residual 30 3.1760 0.18680
## Total 97 17.0021 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permanova(ps.sub, 'Household + Age..months.', 'wunifrac')
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = model, data = df, permutations = permutations, parallel = core, strata = strata)
## Df SumOfSqs R2 F Pr(>F)
## Household 48 4.8365 0.67991 2.2162 9.999e-05 ***
## Age..months. 19 0.9130 0.12835 1.0569 0.3698
## Residual 30 1.3639 0.19174
## Total 97 7.1135 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1